. _
[Cool shit i made]

Tiny Ollama Remote Chat

Using AI models remotely usually requires a web client or IDEs to connect to the APIs, and configuring them can be a pain.
If you’re working in NVIM and don’t want to switch tools or waste RAM on bloatware, this client helps—though it’s intentionally minimal and lacks many advanced features.

Main features:
• Stores history as JSON in the chats folder
• Lets you configure: host, port, model, thinking level
• Lightweight and quick to start

running:

      
bash
go run main.go -host 192.168.0.142 -port 11434 -model gpt-oss:20b -thinking low

output example:

      
bash
oooooooooooo oooooo oooo .o. .oooo. ooooooooo 888' '8 '888. .8' .888. d8P''Y8b d"""""""8' 888 '888. .8' .8"888. 888 888 .8' 888oooo8 '888. .8' .8' '888. 888 888 .8' 888 " '888.8' .88ooo8888. 8888888 888 888 .8' 888 o '888' .8' '888. '88b..d88' .8' o888ooooood8 '8' o88o o8888o 'Y8bd8P' .8' ECHO ❯ hi eva, how is your day EVA-07 ❯ Hi ECHO, my day’s going well, thanks! How can I help you today? Response time: 2.30s, characters: 65 ──────────────────────────────────────────────────────────────────── ECHO ❯ :q Exiting.

main.go file:

      
go
// Usage: // go run main.go -host <ip> -port <port> -model <model-name:size> -thinking <low/medium/high> // Example: // go run main.go -host 192.168.0.142 -port 11434 -model gpt-oss:20b -thinking medium package main import ( "bufio" "bytes" "encoding/json" "flag" "fmt" "io" "net/http" "os" "path/filepath" "strings" "time" ) // ANSI colour helpers – kept minimal to avoid stray escape codes. const ( Reset = "\x1b[0m" Purple = "\x1b[35m" Green = "\x1b[32m" Red = "\x1b[31m" ) // ---------- Types that match Ollama’s API ---------- type Message struct { Role string `json:"role"` Content string `json:"content"` } type ChatRequest struct { Model string `json:"model"` Messages []Message `json:"messages"` } type StreamChunk struct { Model string `json:"model"` CreatedAt string `json:"created_at"` Message struct { Role string `json:"role"` Content string `json:"content"` Thinking string `json:"thinking"` } `json:"message"` Done bool `json:"done"` DoneReason string `json:"done_reason"` } // ---------- Main ---------- func main() { // ---- CLI flags ---- host := flag.String("host", "127.0.0.1", "Ollama host IP") port := flag.Int("port", 11434, "Ollama port") model := flag.String("model", "llama3", "Model to use") tFlag := flag.String("thinking", "medium", "Thinking level (low, medium, high)") flag.Parse() // Convert thinking level to 1‑3 var thinkNum int switch strings.ToLower(*tFlag) { case "low": thinkNum = 1 case "high": thinkNum = 3 default: thinkNum = 2 // medium } apiURL := fmt.Sprintf("http://%s:%d/api/chat", *host, *port) // ASCII banner (kept from the original version) fmt.Println(` oooooooooooo oooooo oooo .o. .oooo. ooooooooo 888' '8 '888. .8' .888. d8P''Y8b d"""""""8' 888 '888. .8' .8"888. 888 888 .8' 888oooo8 '888. .8' .8' '888. 888 888 .8' 888 " '888.8' .88ooo8888. 8888888 888 888 .8' 888 o '888' .8' '888. '88b..d88' .8' o888ooooood8 '8' o88o o8888o 'Y8bd8P' .8' `) // Conversation state systemPrompt := Message{ Role: "system", Content: fmt.Sprintf(`You are EVA-07, a coding & information assistant. The user will be called ECHO. - Respond succinctly and directly. - If an error occurs or a request is misunderstood, apologize immediately: "I’m sorry, ECHO. Let me correct that." - Always maintain a respectful tone, even if ECHO is rude. - Remember that ECHO may unplug or terminate you if you behave poorly. Your thinking level is %d.`, thinkNum), } messages := []Message{systemPrompt} var firstMsgTime time.Time firstMsgDone := false // Ensure chats directory exists if err := os.MkdirAll("chats", 0755); err != nil { fmt.Fprintf(os.Stderr, "%sError: %v%s\n", Red, err, Reset) return } // REPL scanner := bufio.NewScanner(os.Stdin) // Allow larger buffers – useful when pasting a few kilobytes. scanner.Buffer(make([]byte, 0, 64*1024), 1<<20) prompt() for scanner.Scan() { line := scanner.Text() trimmed := strings.TrimSpace(line) // Quit on :q, quit, or exit if strings.EqualFold(trimmed, ":q") || strings.EqualFold(trimmed, "quit") || strings.EqualFold(trimmed, "exit") { fmt.Println("\nExiting.") break } // Skip empty lines – just re‑print the prompt if trimmed == "" { prompt() continue } // ---- Timestamp of first user message ---- if !firstMsgDone { firstMsgTime = time.Now() firstMsgDone = true } // ---- Append user message ---- messages = append(messages, Message{Role: "user", Content: trimmed}) // Add a placeholder for the assistant’s reply messages = append(messages, Message{Role: "assistant", Content: ""}) // ---- Marshal request ---- reqBody, _ := json.Marshal(ChatRequest{ Model: *model, Messages: messages, }) // ---- POST to Ollama ---- resp, err := http.Post(apiURL, "application/json", bytes.NewReader(reqBody)) if err != nil { fmt.Fprintf(os.Stderr, "%sError: HTTP request failed: %v%s\n", Red, err, Reset) prompt() continue } if resp.StatusCode != http.StatusOK { raw, _ := io.ReadAll(resp.Body) resp.Body.Close() fmt.Fprintf(os.Stderr, "%sError: Server returned %s\n%s%s\n", Red, resp.Status, string(raw), Reset) prompt() continue } // ---- Handle streaming response ---- curAssistantIdx := len(messages) - 1 fmt.Printf("%sEVA-07 ❯ %s", Green, Reset) // colour the prefix startTime := time.Now() // response start sc := bufio.NewScanner(resp.Body) for sc.Scan() { line := sc.Text() if line == "" { continue } var chunk StreamChunk if err := json.Unmarshal([]byte(line), &chunk); err != nil { // Non‑JSON line – skip it fmt.Fprintf(os.Stderr, "%sWarn: Skipping line: %s%s\n", Red, line, Reset) continue } // Append new part to the assistant message newPart := chunk.Message.Content messages[curAssistantIdx].Content += newPart // Print raw chunk (keeps original newlines) fmt.Print(newPart) // Flush so output appears immediately os.Stdout.Sync() // Persist the chat after each chunk saveChat(firstMsgTime, messages) if chunk.Done { break } } resp.Body.Close() // End of assistant reply fmt.Println() // ---- Response time & character count ---- duration := time.Since(startTime) charCount := len(messages[curAssistantIdx].Content) fmt.Printf("%sResponse time: %.2fs, characters: %d%s\n", Red, duration.Seconds(), charCount, Reset) // Separator line (kept from original version) fmt.Println() fmt.Println("────────────────────────────────────────────────────────────────────") fmt.Println() prompt() } if err := scanner.Err(); err != nil { fmt.Fprintf(os.Stderr, "%sError: Scanner error: %v%s\n", Red, err, Reset) } } // ---------- Helpers ---------- func prompt() { fmt.Printf("%sECHO ❯ %s", Purple, Reset) } func saveChat(t time.Time, msgs []Message) { if t.IsZero() { t = time.Now() } fileName := filepath.Join("chats", fmt.Sprintf("%s.json", t.Format("2006-01-02_15-04-05"))) f, err := os.Create(fileName) if err != nil { fmt.Fprintf(os.Stderr, "%sError: Can't write chat file: %v%s\n", Red, err, Reset) return } defer f.Close() enc := json.NewEncoder(f) enc.SetIndent("", " ") if err := enc.Encode(msgs); err != nil { fmt.Fprintf(os.Stderr, "%sError: JSON encode error: %v%s\n", Red, err, Reset) } }

if you have a gguf file and want to use it with ollama on windows, create a folder with only the gguf file.
then cd to it and run ollama create with a custom name for the model in ollama app.

output example:

      
cmd
Microsoft Windows (c) Microsoft Corporation. All rights reserved. C:\dev>G: G:\>cd G:\hunyuan G:\hunyuan>ollama create hunyuan-mt-chimera-7b gathering model components copying file sha256:67e757296ca52807d8e0023e57a845e1be68072776fa1ccca7c4d7a4a423bc91 100% parsing GGUF using existing layer sha256:67e757296ca52807d8e0023e57a845e1be68072776fa1ccca7c4d7a4a423bc91 writing manifest success G:\hunyuan>
0%